{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import onnxruntime_genai as og\n",
    "import argparse\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = og.Model('..\\..\\..\\..\\Models\\Phi-3-mini-4k-instruct-onnx\\directml\\directml-int4-awq-block-128')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = og.Tokenizer(model)\n",
    "tokenizer_stream = tokenizer.create_stream()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "search_options = {\"max_length\": 1024,\"temperature\":0.6}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "params = og.GeneratorParams(model)\n",
    "params.try_use_cuda_graph_with_max_batch_size(1)\n",
    "params.set_search_options(**search_options)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = \"<|system|>You are a helpful AI assistant.<|end|><|user|>Can you introduce yourself?<|end|><|assistant|>\"\n",
    "input_tokens = tokenizer.encode(prompt)\n",
    "params.input_ids = input_tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "generator = og.Generator(model, params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Of course! I'm an AI developed by Microsoft, designed to assist and provide information to users like you. My purpose is to help answer your questions, provide guidance, and offer support in various areas such as general knowledge, research, and problem-solving. I'm powered by advanced machine learning algorithms, enabling me to understand and respond to your queries effectively. While I don't have personal experiences or emotions, I'm here to make your interactions as helpful and informative as possible. How can I assist you today?"
     ]
    }
   ],
   "source": [
    "while not generator.is_done():\n",
    "                generator.compute_logits()\n",
    "                generator.generate_next_token()\n",
    "\n",
    "                new_token = generator.get_next_tokens()[0]\n",
    "                print(tokenizer_stream.decode(new_token), end='', flush=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
